package org.xqh.test.yzs.asrcheck;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.xqh.test.NumberUtils;
import org.xqh.utils.excel.ExcelExportUtil;
import org.xqh.utils.excel.ExcelReader;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.util.FileCopyUtils;
import org.springframework.util.StringUtils;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.*;
import java.util.stream.Collectors;

/**
 * @ClassName SelectAsrErrorCase
 * @Description 处理ASR识别错误 语料
 * @Author xuqianghui
 * @Date 2019/12/26 16:46
 * @Version 1.0
 */
public class SelectAsrErrorCase {

    private final static String root_path = "E:\\document\\yzs\\program\\运营数据分析\\";

    private final static String work_path = root_path + "小茂音箱12.23-12.29标注\\";

    private final static String online_audio_dir = root_path + "线上音频";// 从线上 下载的 所有音频目录

    private final static String copy_audio_path = work_path + "需验证音频\\";

    private final static String hot_dict_file = work_path + "添加热词.txt";// 已添加热词 数据

    private final static String export_excel = work_path + "错误语料+sessionId.xlsx";//导出 excel

    private final static String export_excel2 = work_path + "export_excel2.xlsx";//导出 excel

    private final static String export_excel3 = root_path + "export_excel3.xlsx";//导出 excel

    private final static String export_excel4 = root_path + "export_excel4.xlsx";//导出 excel

    private final static String export_success_rate_result = root_path + "success_rate.xlsx";// 计算成功率 结果

    static String oneshotKey = "小茂小茂";

    private final static String juhao = "。";

    private final static String suffix_mp3 = ".mp3";

    private final static String suffix_pcm = ".pcm";

    /**
     * 拷贝音频 文件 扫描的文件夹
     */
    private final static List<String> copyAudioDirectoryList = Arrays.asList(
            "2019-12-23-ym-hotel",
            "2019-12-24-ym-hotel",
            "2019-12-25-ym-hotel",
            "2019-12-26-ym-hotel",
            "2019-12-27-ym-hotel",
            "2019-12-28-ym-hotel",
            "2019-12-29-ym-hotel"
    );

    public static void main(String[] args) {
//        exportErrorCaseNums();
//        copyExcelContainsAudio();

//        calcSuccessRate(work_path + "uat环境识别结果-last-all-result.xlsx", export_excel2);
        exportSuccessRateResult(work_path + "uat环境识别结果-last-all-result.xlsx", export_excel2, work_path+"最终结果.xlsx");
    }


    /**
     * 计算导出 识别成功率
     */
    public static List<AsrRetTmp> calcSuccessRate(String asrRetFile, String outExcelFile) {
        /**
         * 解析线上问题 音频 & 标注 tts
         */
        List<String[]> bz_tts_ret = ExcelReader.getExcelData(new File(export_excel), 1);
        List<AsrRetTmp> bzList = Lists.newArrayList();

        for (String[] array : bz_tts_ret) {
            if (checkStringArray(2, array)) {
                bzList.add(AsrRetTmp.builder()
                        .tts(array[0])
                        .prodSessionId(array[1])
                        .build());
            }
        }

        /**
         * 解析 因添加热词 在 测试环境 导出的 识别结果
         */
        List<String[]> hot_dict_ret = ExcelReader.getExcelData(new File(asrRetFile), 1);
        List<AsrRetTmp> hotRetList = Lists.newArrayList();
        for (String[] array : hot_dict_ret) {
            if (checkStringArray(3, array)) {
                hotRetList.add(AsrRetTmp.builder()
                        .prodSessionId(array[0])
                        .asrRet(array[1])
                        .sessionId(array[3])
                        .build());
            }
        }

        bzList.stream().forEach(b -> {
            Optional<AsrRetTmp> optional = hotRetList.stream().filter(h -> h.getProdSessionId().equals(b.getProdSessionId())).findAny();
            if (optional.isPresent()) {
                b.setAsrRet(optional.get().getAsrRet());
                b.setCorrect(String.valueOf(b.getAsrRet().contains(b.getTts())));// 标识 是否识别正确
                b.setSessionId(optional.get().getSessionId());
            }
        });
        String[] headers = {"tts", "prod-sessionId", "asr识别结果", "test-sessionId", "是否正确"};
        String[] properties = {"tts", "prodSessionId", "asrRet", "sessionId", "correct"};
        ExcelExportUtil.export(headers, properties, bzList, false, outExcelFile);
        return bzList;
    }

    /**
     * 导出 计算的 识别成功率
     */
    public static void exportSuccessRateResult(String asrRetFile, String outExcelFile, String lastOutFile) {
        List<AsrCaseDemo> caseList = Lists.newArrayList();
        List<String[]> list = ExcelReader.getExcelData(new File(work_path + "识别错误语料+错误次数+sessionId.xlsx"), 1);
//        List<String> hotList = getHotDictList();
        for (String[] array : list) {
//            && hotList.contains(array[0])
            if (checkStringArray(3, array)) {
                AsrCaseDemo d = AsrCaseDemo.builder()
                        .text(array[0])
                        .num(Double.valueOf(array[1]).intValue())
                        .sessionIdsJson(array[2])
                        .build();
                d.setSessionIds(JSONArray.parseArray(d.getSessionIdsJson(), String.class));
                caseList.add(d);
            }
        }
        List<AsrRetTmp> tmpList = calcSuccessRate(asrRetFile, outExcelFile);
        caseList.stream().forEach(c-> {
            List<AsrRetTmp> correctList = tmpList.stream().filter(t-> c.getSessionIds().contains(t.getProdSessionId()) &&
                    "true".equals(t.getCorrect())).collect(Collectors.toList());
            c.setCorrectNum(correctList.size());
            if(c.getCorrectNum() > 0){
                String rate = NumberUtils.divide(new BigDecimal(c.getCorrectNum() * 100), new BigDecimal(c.getNum())).toString()+"%";
                c.setCorrectRate(rate);
            }else {
                c.setCorrectRate("0%");
            }
        });
        String[] headers = {"tts", "总个数", "识别正确个数", "正确率", "sessionIdsJson"};
        String[] properties = {"text", "num", "correctNum", "correctRate", "sessionIdsJson"};
        ExcelExportUtil.export(headers, properties, caseList, false, lastOutFile);
    }

    public static boolean checkStringArray(int len, String[] array) {
        return Objects.nonNull(array) && array.length >= len && StringUtils.hasText(array[len - 1]);
    }

//    /**
//     * 获取 添加的热词数据
//     * @return
//     */
//    public static List<String> getHotDictList(){
//        List<String> hotDictList = ReadTxtFileUtils.readTxt(new File(hot_dict_file));
//        return hotDictList;
//    }

    /**
     * 复制 excel中 包含的 音频文件
     */
    public static void copyExcelContainsAudio() {
        List<AsrCaseDemo> perCaseList = Lists.newArrayList();
        List<AsrCaseDemo> complexList = exportErrorCaseNums();
        Map<String, FileType> fileMap = Maps.newHashMap();// 所有文件
        copyAudioFile(new File(online_audio_dir), fileMap, 0);
        // 取到已添加热词的 集合
//        List<String> hotDictList = getHotDictList();
        for (AsrCaseDemo acd : complexList) {
//            if (hotDictList.contains(acd.getText())) {
                for (String sid : acd.getSessionIds()) {
                    AsrCaseDemo ncd = AsrCaseDemo.builder()
                            .text(acd.getText())
                            .hotDict(true)
                            .sessionId(sid)
                            .build();
                    if (fileMap.containsKey(sid)) {
                        perCaseList.add(ncd);
                        // 拷贝 音频文件
                        FileType ft = fileMap.get(sid);
                        File mp3Path = new File(copy_audio_path + "mp3");
                        File pcmPath = new File(copy_audio_path + "pcm");
                        if (!mp3Path.exists()) {
                            mp3Path.mkdirs();
                        }
                        if (!pcmPath.exists()) {
                            pcmPath.mkdirs();
                        }
                        File mp3File = new File(copy_audio_path + "mp3" + File.separator + sid + suffix_mp3);
                        File pcmFile = new File(copy_audio_path + "pcm" + File.separator + sid + suffix_pcm);
                        if (Objects.nonNull(ft.getMp3File()) && Objects.nonNull(ft.getPcmFile())
                                && !mp3File.exists() && !pcmFile.exists()) {
                            try {
                                FileCopyUtils.copy(ft.getMp3File(), mp3File);
                                FileCopyUtils.copy(ft.getPcmFile(), pcmFile);
                            } catch (IOException e) {

                            }
                        }
                    }
//                }
            }
        }

        String[] headers = {"语料", "sessionId"};
        String[] properties = {"text", "sessionId"};
        ExcelExportUtil.export(headers, properties, perCaseList, false, export_excel);
    }

    public static void copyAudioFile(File file, Map<String, FileType> fileMap, int idx) {
        idx ++;
        if (file.isFile()) {
            String fileName = file.getName();
            if (fileName.endsWith(suffix_mp3) || fileName.endsWith(suffix_pcm)) {
                String sessionId = fileName.substring(0, fileName.length() - 4);
                FileType ft = null;
                if (fileMap.containsKey(sessionId)) {
                    ft = fileMap.get(sessionId);
                } else {
                    ft = FileType.builder()
                            .sessionId(sessionId)
                            .build();
                    fileMap.put(sessionId, ft);
                }
                if (fileName.endsWith(suffix_mp3)) {
                    ft.setMp3File(file);
                } else {
                    ft.setPcmFile(file);
                }
            }
        } else {
            String fileName = file.getAbsolutePath();
            if(idx != 1){//第一级目录 不过滤
                Optional<String> optional = copyAudioDirectoryList.stream().filter(d-> fileName.contains(d)).findAny();
                if(!optional.isPresent()){
                    return ;
                }
            }
            System.out.println("scan path ==> "+fileName);
            for (File f : file.listFiles()) {
                copyAudioFile(f, fileMap, idx);
            }
        }
    }

    @Data
    @Builder
    @AllArgsConstructor
    @NoArgsConstructor
    public static class FileType {

        private String sessionId;

        private File mp3File;

        private File pcmFile;
    }

    /**
     * 导出错误语料 次数排名 以及对应sessionIds
     */
    public static List<AsrCaseDemo> exportErrorCaseNums() {
        List<AsrCaseDemo> caseList = parseExcel();
        Map<String, AsrCaseDemo> map = new HashMap<>();
        List<AsrCaseDemo> outList = Lists.newArrayList();
        for (AsrCaseDemo s : caseList) {
            if (map.containsKey(s.getText())) {
                AsrCaseDemo ac = map.get(s.getText());
                ac.setNum(ac.getNum() + 1);
                ac.getSessionIds().add(s.getSessionIds().get(0));
            } else {
                AsrCaseDemo acd = AsrCaseDemo.builder()
                        .text(s.getText())
                        .num(1)
                        .sessionIds(s.getSessionIds())
                        .build();
                map.put(s.getText(), acd);
                outList.add(acd);
            }
        }
        for(AsrCaseDemo a:outList){
            a.setSessionIdsJson(JSON.toJSONString(a.getSessionIds()));
        }
        outList.sort((a, b) -> b.getNum().compareTo(a.getNum()));
//        String[] headers = {"语料", "错误次数", "sessionIds"};
//        String[] properties = {"text", "num", "sessionIdsJson"};
//        ExcelExportUtil.export(headers, properties, outList, false, work_path+"out-result1");
        return outList;
    }

    /**
     * 解析excel
     *
     * @return
     */
    public static List<AsrCaseDemo> parseExcel() {
        List<AsrCaseDemo> retList = Lists.newArrayList();
        List<String[]> list = ExcelReader.getExcelData(new File(work_path + "错误语料+sessionId.xlsx"), 1);
        for (String[] array : list) {
            if (Objects.nonNull(array) && array.length >= 2 && StringUtils.hasText(array[0])) {
                String text = array[0].trim();
                String sessionId = array[1];
                if (text.startsWith(oneshotKey)) {
                    text = text.substring(4);
                }
                if (text.endsWith(juhao)) {
                    text = text.replaceFirst(juhao, "");
                }
                if (StringUtils.hasText(text)) {
                    retList.add(
                            AsrCaseDemo.builder()
                                    .text(text)
                                    .sessionIds(Lists.newArrayList(sessionId))
                                    .build()
                    );
                }
            }else {
                break;
            }
        }
        return retList;
    }

    @Data
    @Builder
    @AllArgsConstructor
    @NoArgsConstructor
    public static class AsrRetTmp {
        private String tts;

        private String asrRet;

        private String sessionId;

        private String correct;//是否识别正确

        private String prodSessionId;//对应线上 环境的sessionId
    }

    @Data
    @Builder
    @AllArgsConstructor
    @NoArgsConstructor
    public static class AsrCaseDemo {

        private String text;

        private Integer num;

        private Integer correctNum;// 纠正正确个数

        private String correctRate;//正确率

        private List<String> sessionIds;

        private String sessionIdsJson;

        private boolean hotDict;//是否添加热词

        private String sessionId;
    }
}
